for i in range(5):
print(i)
0
1
2
3
4
for i in range(1, 5+1):
print(i)
1
2
3
4
5
L = [1,2,3,4,5]
for i in L:
print(i)
1
2
3
4
5
pip install bs4
from bs4 import BeautifulSoup as bs
s = '''<div class="nrec" style="-webkit-text-stroke-width:0px;color:rgb(187, 187, 187);display:inline-block;float:left;font-family:serif;font-size:medium;font-style:normal;font-variant-caps:normal;font-variant-ligatures:normal;font-weight:400;letter-spacing:normal;min-height:1em;orphans:2;padding:0.5ex 0px 0px;text-align:right;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;vertical-align:middle;white-space:normal;widows:2;width:4ex;word-spacing:0px;"><span style="color:rgb(255,255,102);"><span class="hl f3">38</span></span></div>
<div class="title" style="-webkit-text-stroke-width:0px;color:rgb(187, 187, 187);display:block;font-family:Times;font-size:medium;font-style:normal;font-variant-caps:normal;font-variant-ligatures:normal;font-weight:400;letter-spacing:normal;margin:0px 5ex;min-height:1em;orphans:2;padding:0.5ex 0px 0px;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;vertical-align:middle;white-space:normal;widows:2;word-spacing:0px;"><a style="color:rgb(170, 170, 170);text-decoration:none;" target="_blank" rel="noopener noreferrer" href="https://www.ptt.cc/bbs/Baseball/M.1726184897.A.8E7.html">[問題] 平野怎麼寫昨晚的報告</a></div>
<div class="meta" style="-webkit-text-stroke-width:0px;color:rgb(187, 187, 187);display:block;font-family:Times;font-size:medium;font-style:normal;font-variant-caps:normal;font-variant-ligatures:normal;font-weight:400;letter-spacing:normal;margin:0px 0px 0px 5ex;min-height:1em;orphans:2;text-align:start;text-decoration-color:initial;text-decoration-style:initial;text-decoration-thickness:initial;text-indent:0px;text-transform:none;vertical-align:middle;white-space:normal;widows:2;word-spacing:0px;">
<div class="author" style="display:inline-block;font-family:Inconsolata, serif;padding:0.5ex 0px;">kcola</div>
</div>'''
s
html的結構 | html | head | body | div | a | |
---|---|---|---|---|---|---|
< html> | ||||||
< head > | ||||||
< /head > | ||||||
< body > | ||||||
< div > | ||||||
< a > | ||||||
< /a > | ||||||
< /div > | ||||||
< /body > | ||||||
< /html > |
bs(s, 'html5lib')
tmp1 = bs(s, 'html5lib')
tmp1.select('html')
tmp1.select('head')
tmp1.select('body')
tmp1.select('div')
tmp1.select('a')
tmp1.select('div.title')
tmp1.select('div.title>a')
a = tmp1.select('div.title>a')
type(a)
list
len(a)
1
for i in a:
print(i)
for i in a:
print(i.get('href'))
print(i.get('rel'))
print(i.get('style'))
for i in a:
print(i.text)